29/12/20 v2 PH
Implemented covariance VMI analysis routines (includes filtering).
10/12/20 v1
Basic correlation coefficient for ion and electron channels (raw data, only gas on/off filtering).
So far: apply covariance (correlation coefficient) calculation to intensities (ion tof) and electron counts per shot.
Uses numpy.corrcoef (https://numpy.org/devdocs/reference/generated/numpy.corrcoef.html) to compute the matrices, which returns normalised covariance (correlation coefficients).
import numpy as np
from h5py import File
from pathlib import Path
# HV imports
import holoviews as hv
from holoviews import opts
hv.extension('bokeh', 'matplotlib')
import xarray as xr
# Memory profiler - OPTIONAL for testing
# https://timothymonteath.com/articles/monitoring_memory_usage/
# %load_ext memory_profiler
# %memit
# Quick hack for slow internet connection!
%autosave 36000
# Also image stacks can be problematic
showImgStacks = False
# Import class - this requires pointing to the `tmoDataBase.py` file.
# See https://github.com/phockett/tmo-dev
# Import class from file
import sys
sys.path.append(r'/cds/home/p/phockett/dev/tmo-dev/tmo')
import tmoDataBase as tb
# from vmi import VMI # VMI class - currently has multiple filtering, ish.
# from inversion import VMIproc # VMI processing class
from correlations import corr # Correlations class
tb.setPlotDefaults() # Set plot defaults (optional)
# tb.setPlotDefaults() # Set plot defaults (optional)
# Set main data path
baseDir = Path('/reg/data/ana15/tmo/tmolw0618/scratch/preproc/v5')
keys = range(81,87+1)
# Setup class object and which runs to read.
# data = VMIproc(fileBase = baseDir, runList = range(81,87+1), fileSchema='_v5')
# data = VMIproc(fileBase = baseDir, runList = keys, fileSchema='_v5') # Testing with single dataset
# data = VMI(fileBase = baseDir, runList = keys, fileSchema='_v5') # Testing with single dataset
data = corr(fileBase = baseDir, runList = keys, fileSchema='_v5') # Testing with multiple datasets
# The class has a bunch of dictionaries holding info on the data
# data.runs['files']
# Read in the files with h5py
# There is very basic checking implemented currently, just to see if 'energies' is present.
data.readFiles()
# Simple filter - set for gas on only
# TODO: implement multiple filter sets here.
# data.setFilter(reset = True)
data.setFilter({'gas on':{'evrs':[1,1,70]}}, reset = True) # , 'gas off':{'evrs':[0,0,70]}})
data.filterData()
# Run corrRun() to calculate covariance matrices for each run
data.corrRun()
data.hmap
data.hmap.layout().cols(2)
Some strong correlations with time, is there anything interesting here...?
TODO:
# data.setFilter()
data.setFilter({'gas off':{'evrs':[0,0,70]}}, reset = True) # , 'gas off':{'evrs':[0,0,70]}})
data.filterData()
data.corrRun()
data.hmap
In the current implementation this will calculate covariances for electron (x,y) data with another data variable, e.g. ion intensities, on a per-shot basis. The routine uses numpy.corrcoef (https://numpy.org/devdocs/reference/generated/numpy.corrcoef.html) to calculate correlations for each (x,y) pixel with each other data dimension set (e.g. multiple ion species). It's quite slow, but amenable to massive parallelisation with some effort.
To make the code simpler, the data is converted to a Sparse array first (using the Sparse library), standard 3D array referencing is then possible with a minimal memory footprint. It also gives a nice array summary output.
# Set metrics
# data.runMetrics()
# For testing, set filter first...
# Simple filter - set for gas on only
# TODO: implement multiple filter sets here.
data.setFilter(reset = True)
data.setFilter({'unsat':{'evrs':[1,1,70], 'eShot':[0,2000,0], 'desc':'Unsaturated signal, electron counts < 2000.'}}) #'gas on':{'evrs':[1,1,70]}, 'gas off':{'evrs':[0,0,70]}})
# Gas on/off, set event code #70. Unsaturated signal, check electron counts < 2000.
data.filterData()
data.filters
# Generate covariance VMI images.
# This takes a while (~30 mins for test dataset) with current implementation, which is looped over pixel and correlated datasets (default is 'intensities' data).
# Note that the default includes x4 downsampling on the (x,y) pixels.
data.genCorrVMIXmulti(saveSparse=True, downsampleRatios = [2,2,1])
*** TESTING seems OK, but filters not working so far? Could be super() issue? Or overwriting data accidentally?
# Show images
data.showImgSet(dims = ['xc','yc'])
data.imgStack
# Quick save...
# data.corrStack.to_netcdf("LW06_corrStack_test_040121.nc") # Fails on attribs
data.corrStackCP = data.corrStack.copy()
data.corrStackCP['unsat'].attrs = {}
data.corrStackCP.to_netcdf("LW06_corrStack_test_040121.nc") # Runs... 72Mb at 2,2 resample
# ds_disk = xr.open_dataset("LW06_corrStack_test_040121.nc") to load
!ls -ll
See the VMIproc class guide for details.
# For cpbasex, set the basis path
basisPath = r'/reg/d/psdm/tmo/tmolw0618/scratch/results/tmo_ana/calc/G_r512_k128_l4.h5'
# Set also local version - pip installed version not working with loadG in testing (version mismatch issue, or something in import routine?)
pbasexPath = r'/reg/d/psdm/tmo/tmolw0618/results/modules/pbasex' #/pbasex'
# Import module + basis functions
data.setMethod(pbasexPath = pbasexPath, basisPath = basisPath)
# Q: will this work for data with additional dims...
filterSet = 'unsat'
# Invert a dataset
# data.setCentre(dims = ['xc','yc'], filterSet='gas on') # This sets [137, 5] with test data, checks wrong dim...?
# data.setCentre([137, 131], dims = ['xc','yc'], filterSet=filterSet) # 4,4 downsample
# data.setCentre([276, 274], dims = ['xc','yc'], filterSet=filterSet) # 2,2 downsample
data.setCentre([277, 275], filterSet=filterSet) # , dims = ['xc','yc'], filterSet=filterSet) # 2,2 downsample
# data.setCentre(filterSet=filterSet)
# data.checkCentre(dims = ['xc','yc'], name='gas on') # Currently not working? NOW FIXED below with imgStack replacement
data.centreInds
# data.showImg(name=filterSet, dims = ['xc','yc'])
data.imgStack = data.corrStack[filterSet].sel({'intensities':3}).to_dataset() # For lower dims to imgStack for testing - should add dims into main routine
# test = data.showImg(name=filterSet, dims = ['xc','yc'], returnImg = True) # Testing direct img return - OK
data.checkCentre(name=filterSet) #, dims = ['yc','xc']) #, name=filterSet) # Currently not working? - FIXED if imgStack of reduced dims set above.
# data.inv(filterSet=filterSet) # Issue with dim stacking here - now have one too many!
# # For rapid plotting per run, use self.plotSpectra()
# data.plotSpectra()
# [dim for dim in data.imgStack.dims if dim not in ['xc', 'yc', 'run']]
# data.imgStack.var
# list(data.imgStack.groupby(corrDims[0]))
# Try wrapping inv routine for correlation dim
# Set corrStack to avoid overwrite - now set in main routine
# data.corrStack = data.imgStack.copy()
# data.corrStackBK = data.imgStack.copy()
filterSet = 'unsat'
# Check dims
corrDims = [dim for dim in data.corrStack.dims if dim not in ['xc', 'yc', 'run']]
# if len(corrDims) > 1:
# print()
# for item in data.corrStack[corrDims]:
# print(item)
# data.imgStack.groupby(corrDims[0]).map(data.inv()) # This could work, but will need some wrapping
data.proc = {} # MAY ALREADY EXIST, for testing just clear
data.proc['corr'] = {}
# HACK - groupby and process each correlation set separately. Will then need to restack results.
for item in data.corrStack.groupby(corrDims[0]):
print(item)
data.imgStack = item[1].transpose('xc','yc',...) # Force dim ordering for inversion routine - should back-propagate this! NOTE ALSO DIM ORDERING convention, ugh - should pull from centreInds for consistency.
# data.imgStack = item[1].transpose('yc','xc',...)
# Upsample image - with small data (255 px) getting some weird issues... might be due to radial masking?
# FULL MANUAL!!! As per http://xarray.pydata.org/en/stable/generated/xarray.Dataset.interp.html#xarray.Dataset.interp
# Auto-dim based code already written, somewhere...?
# new_xc = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4)
dsRatio = 2
xcNew = np.linspace(data.imgStack.xc[0], data.imgStack.xc[-1], data.imgStack.dims["xc"] * dsRatio)
ycNew = np.linspace(data.imgStack.yc[0], data.imgStack.yc[-1], data.imgStack.dims["yc"] * dsRatio)
data.imgStack = data.imgStack.interp(xc=xcNew, yc=ycNew).fillna(0) # Note fillna to ensure no NaNs in output image stack.
# Now working OK with upscaling, remember to reset centre coords!
# data.setCentre([137, 131] * 4, dims = ['xc','yc'], filterSet=filterSet)
# data.setCentre([276, 274] * dsRatio, dims = ['xc','yc'], filterSet=filterSet)
# data.setCentre([277, 275] * dsRatio, dims = ['xc','yc'], filterSet=filterSet)
data.setCentre([277, 275] * dsRatio, filterSet=filterSet)
data.inv(filterSet=filterSet) # This runs, but currently throw errors on coord reassignment.
# data.inv()
# data.plotSpectra(filterSet=filterSet)
data.proc['corr'][item[0]] = data.proc[filterSet].copy()
# Reset to full image stack
# data.imgStack = data.corrStack.copy()
# Restack
# TODO: add for fold, fit and inv too.
xrSet = [data.proc['corr'][k]['xr'].expand_dims({'intensities':[k]}) for k in data.proc['corr'].keys()]
temp = xr.concat(xrSet, 'intensities')
data.proc[filterSet]['xr'] = temp #.to_dataset() # temp.rename('CorrXR').to_dataset()
#
# data.proc[filterSet]['xr'].name = 'CorrXR' #
# data.proc[filterSet]['xr'] = data.proc[filterSet]['xr'].to_dataset()
# data.proc[filterSet]['xr'] = xr.combine_nested(xrSet, 'intensities')
data.imgStack = data.corrStack.copy()
data.showImgSet(dims = ['yc','xc'])
# data.proc[filterSet]
data.setRmask(filterSet=filterSet, XSmin=1e-6, rPix = [0,5], eRange = [1,45]) # Without mask reset this is odd... and also after mask reset - not correctly using all dims?
# ISSUE seems to be with XS dim, but not clear why. Actually looks OK with lower threshold - unnorm values?
hmap = data.plotSpectra(filterSet=filterSet, useMask = True, returnMap = True) # Throws dim errors... NOW WORKING AFTER FIXING DATASTRUCTURE!
hmap
# TODO: fix ePlot slicing, assumes dims at the moment. #, ePlot=[1, 45])
# # Check routine...
# eSpecDS = hv.Dataset(data.proc[filterSet]['xr']) #.rename('CorrXR').to_dataset()) # ['corrXR']) # As dataarray get blank dataset returned
# hmap = eSpecDS.to(hv.Curve, kdims=['E'])
# # hmap = eSpecDS.to(hv.Curve, kdims=['E'], vdims=['BLM'])
# # hmap = eSpecDS.to(hv.Curve, kdims=['E'], vdims=['BLM'])
# # hmap = eSpecDS.to(hv.HoloMap, kdims=['E'])
# # TODO - remember how to do this properly!!! May need to stack as dict for multidim case?
# # AH - issue was actually cood assignments above. Now working
# # Try manual stacking, sigh
# # curveDict = {}
# # for run in eSpecDS['run'].data:
# # curveDict.update({(BLM, run):hv.Curve(eSpecDS.select(BLM=BLM, run = run), kdims=['E']) for BLM in eSpecDS['BLM'].data})
# # hmap = hv.HoloMap(curveDict, kdims=['BLM','run'])
# # hmap
hmap.overlay('BLM').layout('run').cols(1)
hmap.overlay('run').layout('BLM').cols(1)
eSpecDS
data.imgStack['gas on'].sel(run=81).plot.imshow()
imgIn = data.imgStack #['gas on'] # .data
# imgIn = data.restackVMIdataset(reduce=False)
dimStack = imgIn.dims
# imgIn.transpose(*dimStack[2:],*dimStack[:2]) # OK for da
imgIn.transpose('xc','yc',...)
dims = ['xc','yc']
step = [4,4]
# resampleDims = {d:s for (d,s) in zip(dims,step)}
# imgIn.resample({d:s for (d,s) in zip(dims,step)}) #, boundary="trim") # Only single dim for resample
# imgIn.resample(resampleDims.pop())
# imgIn.resample(xc=2).interpolate("linear") # invalid freq error???
xcNew = np.linspace(imgIn.xc[0], imgIn.xc[-1], imgIn.dims["xc"] * 4)
imgIn.interp(xc=xcNew)
{d:s for (d,s) in zip(dims,step)}
resampleDims.pop()
# data.proc[filterSet] #['fold']
dims = list(data.imgStack.dims)[-1:0:-1]
dims
# data.imgStack[dims[0]][np.arange(0, data.proc['gas on']['fold'].shape[0])]
try:
data.imgStack[dims[0]][np.arange(0, data.proc['gas on']['pbasex']['inv'].shape[0])]
except IndexError:
np.arange(0, data.proc['gas on']['pbasex']['inv'].shape[0])
# try:
# b0 = self.imgStack[dims[0]][np.arange(0,data.shape[0])]
# b1 = self.imgStack[dims[1]][np.arange(0,data.shape[1])]
# # Default to int coords, this allows for cases where sizes don't match (if resized for instance)
# except IndexError:
# b0 = np.arange(0,data.shape[0])
# b1 = np.arange(0,data.shape[1])
data.proc['gas on']['pbasex']['fit'].shape[0]
data.proc['gas on']['fold'].shape
data.proc['corr'][0].keys()
import matplotlib.pyplot as plt
# Check filterset
# plt.imshow(data.proc[filterSet]['fold'][:,:,6])
# plt.imshow(data.proc[filterSet]['pbasex']['fit'][:,:,0])
# Check corr
corrSet = 3
# plt.imshow(data.proc['corr'][corrSet]['fold'][:,:,0])
plt.imshow(data.proc['corr'][corrSet]['pbasex']['fit'][:,:,2])
plt.show()
plt.imshow(data.proc['corr'][corrSet-1]['pbasex']['fit'][:,:,2])
fold = data.proc['gas on']['fold'][:,:,0]
out = data.cp.pbasex(fold, data.gBasis, alpha=3.59e-4, make_images=True)
out
data.proc['gas on']['pbasex']['inv'][:,:,0].shape
data.corrStackBK = data.corrStack.copy()
# Check restacking...
# inv routine runs on either:
# self.restackVMIdataset(reduce=False)
# self.imgStack[filterSet].data
# data.imgStack['gas on']
data.restackVMIdataset(reduce=False)
# tmo-dev class version
data.__version__
import scooby
scooby.Report(additional=['holoviews', 'xarray', 'sparse'])